1 Introduction

Here, we will cluster the cells of the patient with id “3299”.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)

2.2 Define parameters

# Paths
path_to_obj <- here::here("results/R_objects/4.seurat_leukemic.rds")
path_to_save <- here::here("results/R_objects/5.seurat_clustered_3299.rds")


# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
                   "blue", "mediumorchid2", "coral2", "blueviolet",
                   "indianred4", "deepskyblue1", "dimgray", "deeppink1",
                   "green", "lightgray", "hotpink1")


# Source functions
source(here::here("bin/utils.R"))


# Thresholds

2.3 Load data

seurat <- readRDS(path_to_obj)

3 Subset and reprocess

Of note, everytime that we filter out a specific population, the set of highly variable genes (HVG) and the axis of variability (PCs) changes. Thus, we need to rerun the main analysis steps iteratively.

First, we focus on the patient studied in this notebook:

seurat <- subset(seurat, donor_id == "3299")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat, group.by = "subproject")

In this case we only have one subproject, so we do not need to worry about batch effects. However, we see that the Richter transformation (RT) sample comes from peripheral blood (PB) whilst the other come from bone marrow (BM)

table(seurat$sample_description_FN, seurat$tissue)
##                  
##                     BM   PB
##   posttreatment   4383    0
##   posttreatment_2 1239    0
##   posttreatment_3  676    0
##   RS                 0 2813
DimPlot(seurat, group.by = "tissue")

Thus, we will focus on the samples that come from BM. Since we know that Ferran could assign 80 to 90% of the cells in post-treatment_3 to Richter cells, we will work with them and exclude the RT sample:

seurat <- subset(seurat, tissue == "BM")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat, group.by = "tissue")

DimPlot(seurat, group.by = "time_point")

FeaturePlot(seurat, "pct_mt") + scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, "nFeature_RNA") + scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, "HBM") + scale_color_viridis_c(option = "magma")

We see a subpopulation with characteristics of low-quality cells: high mitochondrial expression and low number of detected features (genes). In addition, we still see a lingering cluster of erythroblasts (HBM+). Hence, let us exclude both from the dataset:

seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.75)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6298
## Number of edges: 211592
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7657
## Number of communities: 9
## Elapsed time: 0 seconds
DimPlot(seurat)

selected_cells <- colnames(seurat)[!(seurat$seurat_clusters %in% c("7", "8"))]
seurat <- subset(seurat, cells = selected_cells)
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat)

DimPlot(seurat, group.by = "time_point")

4 Cluster

seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6108
## Number of edges: 210001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8511
## Number of communities: 4
## Elapsed time: 0 seconds
DimPlot(seurat)

Cluster 2 can be further stratified:

seurat <- FindSubCluster(
  seurat,
  cluster = "2",
  graph.name = "RNA_snn",
  subcluster.name = "subcluster_2",
  resolution = 0.4
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1572
## Number of edges: 47932
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7805
## Number of communities: 4
## Elapsed time: 0 seconds
seurat$final_clusters <- seurat$subcluster_2
Idents(seurat) <- "final_clusters"
DimPlot(seurat)

5 Save

saveRDS(seurat, path_to_save)

6 Session Info

sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1        stringr_1.4.0        dplyr_1.0.6          purrr_0.3.4          readr_1.4.0          tidyr_1.1.3          tibble_3.1.2         ggplot2_3.3.3        tidyverse_1.3.1      harmony_1.0          Rcpp_1.0.6           SeuratWrappers_0.3.0 SeuratObject_4.0.2   Seurat_4.0.3         BiocStyle_2.18.1    
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1          backports_1.2.1       plyr_1.8.6            igraph_1.2.6          lazyeval_0.2.2        splines_4.0.4         listenv_0.8.0         scattermore_0.7       digest_0.6.27         htmltools_0.5.1.1     fansi_0.5.0           magrittr_2.0.1        tensor_1.5            cluster_2.1.1         ROCR_1.0-11           remotes_2.4.0         globals_0.14.0        modelr_0.1.8          matrixStats_0.59.0    spatstat.sparse_2.0-0 colorspace_2.0-1      rvest_1.0.0           ggrepel_0.9.1         haven_2.4.1           xfun_0.23             crayon_1.4.1          jsonlite_1.7.2        spatstat.data_2.1-0   survival_3.2-10       zoo_1.8-9             glue_1.4.2            polyclip_1.10-0       gtable_0.3.0          leiden_0.3.8          future.apply_1.7.0    abind_1.4-5           scales_1.1.1          DBI_1.1.1             miniUI_0.1.1.1        viridisLite_0.4.0     xtable_1.8-4          reticulate_1.20       spatstat.core_2.1-2   rsvd_1.0.5            htmlwidgets_1.5.3     httr_1.4.2            RColorBrewer_1.1-2    ellipsis_0.3.2        ica_1.0-2             farver_2.1.0          pkgconfig_2.0.3       sass_0.4.0            uwot_0.1.10           dbplyr_2.1.1         
##  [55] deldir_0.2-10         here_1.0.1            utf8_1.2.1            labeling_0.4.2        tidyselect_1.1.1      rlang_0.4.11          reshape2_1.4.4        later_1.2.0           munsell_0.5.0         cellranger_1.1.0      tools_4.0.4           cli_2.5.0             generics_0.1.0        broom_0.7.7           ggridges_0.5.3        evaluate_0.14         fastmap_1.1.0         yaml_2.2.1            goftest_1.2-2         knitr_1.33            fs_1.5.0              fitdistrplus_1.1-5    RANN_2.6.1            pbapply_1.4-3         future_1.21.0         nlme_3.1-152          mime_0.10             xml2_1.3.2            compiler_4.0.4        rstudioapi_0.13       plotly_4.9.4          png_0.1-7             spatstat.utils_2.2-0  reprex_2.0.0          bslib_0.2.5.1         stringi_1.6.2         highr_0.9             RSpectra_0.16-0       lattice_0.20-41       Matrix_1.3-4          vctrs_0.3.8           pillar_1.6.1          lifecycle_1.0.0       BiocManager_1.30.15   spatstat.geom_2.1-0   lmtest_0.9-38         jquerylib_0.1.4       RcppAnnoy_0.0.18      data.table_1.14.0     cowplot_1.1.1         irlba_2.3.3           httpuv_1.6.1          patchwork_1.1.1       R6_2.5.0             
## [109] bookdown_0.22         promises_1.2.0.1      KernSmooth_2.23-18    gridExtra_2.3         parallelly_1.26.0     codetools_0.2-18      MASS_7.3-53.1         assertthat_0.2.1      rprojroot_2.0.2       withr_2.4.2           sctransform_0.3.2     mgcv_1.8-36           parallel_4.0.4        hms_1.1.0             grid_4.0.4            rpart_4.1-15          rmarkdown_2.8         Rtsne_0.15            shiny_1.6.0           lubridate_1.7.10